# NOTE: you should use cleaned up data eventually here



# Load Data
setwd("C:/Users/eas24f/OneDrive - Florida State University/Research/Inertia RESTAT/Data")  # Office computer directory
data <- get(load("ca_enrollment_data_AUG022019")) # individual-level data
households <- get(load("ca_household_data_AUG022019")) # household-level data
plan_data <- read.csv("ca_plan_data2.csv") # Covered California plan data
zip3_choices <- read.csv("zip3_choices2.csv",row.names = 1) # choice set by 3 digit zip and rating area
product_definitions <- read.csv("product_definitions.csv",row.names = 1) # definitions of column names in zip3_choices
network_data <- read.csv("ca_network_data3.csv",row.names=1) # CA network data
outside_logit <- get(load("sipp_logit"))
library(plyr)

# We can drop all uninsured records for this analysis
data <- data[!is.na(data$plan_id),]

# NOTE: I am reordering plan data because I defined a plan_id number earlier based on a previous ordering
plan_data$Plan_ID_Order <- as.character(plan_data$Plan_ID_Order)
rownames(plan_data) <- paste(plan_data$Plan_Name,plan_data$ENROLLMENT_YEAR,
	plan_data$region,sep="")
plan_data <- plan_data[plan_data$Plan_ID_Order,]
		

plan_data$Issuer_Name <- as.character(plan_data$Issuer_Name)
plan_data[plan_data$Issuer_Name == "Anthem Blue Cross","Issuer_Name"] <- "Anthem"
plan_data[plan_data$Issuer_Name == "Blue Shield","Issuer_Name"] <- "Blue_Shield"
plan_data[plan_data$Issuer_Name == "Chinese Community","Issuer_Name"] <- "Chinese_Community"
plan_data[plan_data$Issuer_Name == "Contra Costa Health Plan","Issuer_Name"] <- "Contra_Costa"
plan_data[plan_data$Issuer_Name == "Health Net","Issuer_Name"] <- "Health_Net"
plan_data[plan_data$Issuer_Name == "LA Care","Issuer_Name"] <- "LA_Care"
plan_data[plan_data$Issuer_Name == "UnitedHealthcare","Issuer_Name"] <- "United"
plan_data[plan_data$Issuer_Name == "Western Health","Issuer_Name"] <- "Western"
plan_data[plan_data$Issuer_Name == "Sharp","Issuer_Name"] <- "SHARP"

# Insurer market share by year

	data$insurer_small <- data$insurer
	data[!data$insurer_small %in%  c("Anthem","Blue_Shield","Health_Net","Kaiser"),"insurer_small"] <- "Other"
	data$insurer_small_year <- paste(data$insurer_small,data$year,sep="_")
	
	years <- c(2014:2019)
	insurers_small <- c("Anthem","Blue_Shield","Health_Net","Kaiser","Other")
	output <- matrix(NA,length(insurers_small),length(years),dimnames=list(insurers_small,years))
	output[,"2014"] <- by(data[data$year == 2014,"insurer_small"],data[data$year == 2014,"insurer_small"],length)
	output[,"2015"] <- by(data[data$year == 2015,"insurer_small"],data[data$year == 2015,"insurer_small"],length)
	output[,"2016"] <- by(data[data$year == 2016,"insurer_small"],data[data$year == 2016,"insurer_small"],length)
	output[,"2017"] <- by(data[data$year == 2017,"insurer_small"],data[data$year == 2017,"insurer_small"],length)
	output[,"2018"] <- by(data[data$year == 2018,"insurer_small"],data[data$year == 2018,"insurer_small"],length)
	output[,"2019"] <- by(data[data$year == 2019,"insurer_small"],data[data$year == 2019,"insurer_small"],length)
	
	write.csv(output,"enrollment_by_insurer.csv")
	
# Create Age Group Variable

	data$age_group <- NA
	data[data$AGE < 18 & !is.na(data$AGE),"age_group"] <- "0to17"
	data[data$AGE >= 18 & data$AGE < 26 & !is.na(data$AGE),"age_group"] <- "18to25"
	data[data$AGE >= 26 & data$AGE < 35 & !is.na(data$AGE),"age_group"] <- "26to34"
	data[data$AGE >= 35 & data$AGE < 45 & !is.na(data$AGE),"age_group"] <- "35to44"
	data[data$AGE >= 45 & data$AGE < 55 & !is.na(data$AGE),"age_group"] <- "45to54"
	data[data$AGE >= 55 & data$AGE < 65 & !is.na(data$AGE),"age_group"] <- "55to64"
	data[data$AGE >= 65 & data$AGE < 120 & !is.na(data$AGE),"age_group"] <- "65plus"
	
	
# Metal

	plan_data$metal <- as.character(plan_data$metal_level)
	plan_data[plan_data$metal %in% c("Silver - Enhanced 73",
		"Silver - Enhanced 87","Silver - Enhanced 94"),"metal"] <- "Silver"
	
#### Compare Demographics of OEP vs. SEP

	metals <- c("Catastrophic","Bronze","Silver","Gold","Platinum")
	network_types <- c("HMO","PPO")
	msps <- c("non-MSP","MSP")
	hsas <- c("non-HSA","HSA")
	plan_parameters <- c("Average standardized premium","Average deductible","Average max. out-of-pocket","Average coinsurance")
	insurers <- c("Anthem","Blue_Shield","Chinese_Community","Contra_Costa",
		"Health_Net","Kaiser","LA_Care","Molina","Oscar","SHARP","United","Valley","Western")
	breadth_cats <- c("< 10%","10% to 20%","20% to 30%","> 30%")
	inclus_cats <- c("inclus < 10%","inclus 10% to 20%","inclus 20% to 30%","inclus > 30%")
	#network_measures <- c("Network Breadth","Network Inclusivity")
	
	age_groups <- c("0to17","18to25","26to34","35to44","45to54","55to64","65plus")
	income_groups <- c("138% FPL or less","138% FPL to 150% FPL","150% FPL to 200% FPL",
		"200% FPL to 250% FPL","250% FPL to 400% FPL","400% FPL or greater")
	genders <- c("Female","Male")
	smoker_groups <- c("Non-Smoker","Smoker")
	race_groups <- c("Asian","Black/African American",
		"Hispanic","Other Race","White")
	languages <- c("English","Spanish","Other Language")
	years <- c("2014","2015","2016","2017","2018","2019")
	#subsidy_groups <- c("Eligible for PTCs","Eligible for CSRs","Exempt from Mandate","Subject to Mandate")
	subsidy_groups <- c("Subsidized","Unsubsidized")
	
	product_fields <- c(metals,network_types,breadth_cats,inclus_cats)
	product_fields_inertia <- c(metals,insurers_small,network_types,breadth_cats,inclus_cats)
	fields <- c("Total population",product_fields,years,income_groups,subsidy_groups,age_groups,genders,race_groups,languages)

	# Create/modify variables
	
		# Genders
		data$Gender <- data$gender
		data$Gender <- as.character(data$Gender)
		data[data$gender == "1" & !is.na(data$Gender),"gender"] <- "Male"
		data[data$gender == "0" & !is.na(data$Gender),"gender"] <- "Female"
		
		# Metal
		data$metal <- data$metal_level_enhanced
		data[data$metal == "Minimum Coverage","metal"] <- "Catastrophic"
		data[data$metal %in% c("Silver - Enhanced 73","Silver - Enhanced 87","Silver - Enhanced 94")  & !is.na(data$metal),"metal"] <- "Silver"
	
		# Subsidized/Unsubsidized
		
		data$subsidy_eligible <- as.numeric(households[data$household_year,"subsidized_members"] > 0)
		data$subsidized <- as.character(data$subsidy_eligible) 
		data[data$subsidized == "1" & !is.na(data$subsidized),"subsidized"] <- "Subsidized"
		data[data$subsidized == "0" & !is.na(data$subsidized),"subsidized"] <- "Unsubsidized"
	
		# CSR Eligible
		data$csr_eligible <- as.numeric(data$subsidized == "Subsidized" & data$FPL <= 2.50)
	
		
		# Plan network type/etc
		plan_data$PLAN_NETWORK_TYPE <- as.character(plan_data$PLAN_NETWORK_TYPE)
		data$plan_network_type <- NA
		data[!is.na(data$plan_id),"plan_network_type"] <- plan_data[data[!is.na(data$plan_id),"plan_id"],"PLAN_NETWORK_TYPE"]
	
		data$HMO <- "HMO"
		data[data$plan_network_type != "HMO","HMO"] <- "PPO"

	
		# Income Groups
		data$subsidy_fpl_bracket <- NA
		data[data$FPL <= 1.38 & !is.na(data$FPL),"subsidy_fpl_bracket"] <- "138% FPL or less"
		data[data$FPL > 1.38 & data$FPL <= 1.50 & !is.na(data$FPL),"subsidy_fpl_bracket"] <- "138% FPL to 150% FPL"
		data[data$FPL > 1.50 & data$FPL <= 2.00 & !is.na(data$FPL),"subsidy_fpl_bracket"] <- "150% FPL to 200% FPL"
		data[data$FPL > 2.00 & data$FPL <= 2.50 & !is.na(data$FPL),"subsidy_fpl_bracket"] <- "200% FPL to 250% FPL"
		data[data$FPL > 2.50 & data$FPL <= 4.00 & !is.na(data$FPL),"subsidy_fpl_bracket"] <- "250% FPL to 400% FPL"
		data[data$FPL > 4.00 & !is.na(data$FPL),"subsidy_fpl_bracket"] <- "400% FPL or greater"
	
		# Language
		data$language <- as.character(data$language_spoken)
		data[data$language == "English","language"] <- "English"
		data[data$language == "Spanish","language"] <- "Spanish"
		data[!data$language %in% c("(nonres","English","Spanish"),"language"] <- "Other Language"
		data[data$language == "(nonres","language"] <- NA
	
		# Previous plan
		data$previous_plan_number <- households[data$household_year,"previous_plan_number"]
	
		# Dominated Plan
		data$dominated_choice <- NA
		
		data[data$csr_eligible == 1 & !is.na(data$csr_eligible == 1) & 
			data$subsidy_fpl_bracket %in% c("138% FPL or less","138% FPL to 150% FPL") &
			data$metal  %in% c("Gold","Platinum"),"dominated_choice"] <- 1
		data[data$csr_eligible == 1 & !is.na(data$csr_eligible == 1) & 
			data$subsidy_fpl_bracket %in% c("138% FPL or less","138% FPL to 150% FPL") &
			!data$metal  %in% c("Gold","Platinum"),"dominated_choice"] <- 0
	
		data[data$csr_eligible == 1 & !is.na(data$csr_eligible == 1) & 
			data$subsidy_fpl_bracket == "150% FPL to 200% FPL" &
			data$metal  %in% c("Gold"),"dominated_choice"] <- 1
		data[data$csr_eligible == 1 & !is.na(data$csr_eligible == 1) & 
			data$subsidy_fpl_bracket == "150% FPL to 200% FPL" &
			!data$metal  %in% c("Gold"),"dominated_choice"] <- 0
	
		# CSR choose bronze_flag
		data$csr_chose_bronze <- NA
		
		data[data$csr_eligible == 1 & !is.na(data$csr_eligible == 1) & 
			data$subsidy_fpl_bracket %in% c("138% FPL or less","138% FPL to 150% FPL","150% FPL to 200% FPL") &
			data$metal  %in% c("Bronze","Catastrophic"),"csr_chose_bronze"] <- 1
		data[data$csr_eligible == 1 & !is.na(data$csr_eligible == 1) & 
			data$subsidy_fpl_bracket %in% c("138% FPL or less","138% FPL to 150% FPL","150% FPL to 200% FPL") &
			!data$metal  %in% c("Bronze","Catastrophic"),"csr_chose_bronze"] <- 0
	
		# Add Network variables
		plan_data[is.na(plan_data$Network_num),"Network_num"] <- 1
		network_data_names <- paste(data$insurer,plan_data[data$plan_id,"PLAN_NETWORK_TYPE"],plan_data[data$plan_id,"Network_num"],
			plan_data[data$plan_id,"HSA"],data$year,data$rating_area,data$metal,data$zip3,sep="_")
		
			# Most of the mismatching network names are catastrophic; assign them bronze for this purpose
			missing_catastrophic <- which(!network_data_names %in% rownames(network_data) & data$year < 2019 & data$metal  == "Catastrophic")
			data$metal_netdata <- data$metal
			data[data$metal_netdata == "Catastrophic","metal_netdata"] <- "Bronze"
			data$network_data_name <- paste(data$insurer,plan_data[data$plan_id,"PLAN_NETWORK_TYPE"],plan_data[data$plan_id,"Network_num"],
				plan_data[data$plan_id,"HSA"],data$year,data$rating_area,data$metal_netdata,data$zip3,sep="_")
			data$netdata_available <- network_data_names %in% rownames(network_data)
		
		data$network_breadth <- data$network_inclus <- NA
		data[data$netdata_available,"network_breadth"] <- network_data[data[data$netdata_available,"network_data_name"],"netbreadth_zip3_RA"] 
		data[data$netdata_available,"network_inclus"] <- 100 * network_data[data[data$netdata_available,"network_data_name"],"net_inclusivity"] 	
			
		data$network_breadth_cat <- data$network_inclus_cat <- NA
		data[data$network_breadth < 10 & !is.na(data$network_breadth),"network_breadth_cat"] <- "< 10%"
		data[data$network_breadth >= 10 & data$network_breadth < 20 & !is.na(data$network_breadth),"network_breadth_cat"] <- "10% to 20%"
		data[data$network_breadth >= 20 & data$network_breadth < 30 & !is.na(data$network_breadth),"network_breadth_cat"] <- "20% to 30%"
		data[data$network_breadth >= 30 & !is.na(data$network_breadth),"network_breadth_cat"] <- "> 30%"
		data[data$network_inclus < 10 & !is.na(data$network_inclus),"network_inclus_cat"] <- "inclus < 10%"
		data[data$network_inclus >= 10 & data$network_inclus < 20 & !is.na(data$network_inclus),"network_inclus_cat"] <- "inclus 10% to 20%"
		data[data$network_inclus >= 20 & data$network_inclus < 30 & !is.na(data$network_inclus),"network_inclus_cat"] <- "inclus 20% to 30%"
		data[data$network_inclus >= 30 & !is.na(data$network_inclus),"network_inclus_cat"] <- "inclus > 30%"

	
	compute_population_choices <- function(data,fields) {
		
		pop_output <- rep(NA,length(fields))
		names(pop_output) <- fields
	
		# Total Population
		pop_output["Total Enrollment"] <- nrow(data)

		#data <- data[!data$flagged,]
		
		# Metals
		if(any(fields %in% metals)) {
			pop_output[metals] <- by(data$metal,data$metal,length)[metals]/
				(nrow(data) - length(which(is.na(data$metal))))
		}
		
		# Plan Parameters
		if(any(plan_parameters %in% fields)) {
			pop_output["Average standardized premium"] <- weighted.mean(plan_data[data$plan_unique_id,"Premium"],data$PERWT,na.rm=T)
			pop_output["Average deductible"] <- weighted.mean(plan_data[data$plan_unique_id,"Deductible"],data$PERWT,na.rm=T)
			pop_output["Average max. out-of-pocket"] <- weighted.mean(plan_data[data$plan_unique_id,"OOPMAX"],data$PERWT,na.rm=T)
			pop_output["Average coinsurance"] <- weighted.mean(plan_data[data$plan_unique_id,"ER_coins"],data$PERWT,na.rm=T)
		}
	
		# Network Types
		if(any(network_types %in% fields)) {
			pop_output[network_types] <- by(data$HMO,data$HMO,length)[network_types]/
				(nrow(data) - length(which(is.na(data$HMO))))
		}
	
		# Network Measures
		if(any(breadth_cats %in% fields)) {
			pop_output[breadth_cats] <- by(data$network_breadth_cat,data$network_breadth_cat,length)[breadth_cats]/ 
				(nrow(data) - length(which(is.na(data$network_breadth_cat))))
		}
		if(any(inclus_cats %in% fields)) {
			pop_output[inclus_cats] <- by(data$network_inclus_cat,data$network_inclus_cat,length)[inclus_cats]/ 
				(nrow(data) - length(which(is.na(data$network_inclus_cat))))
		}
		
		# Insurers
		if(any(insurers_small %in% fields)) {
			insurers_big4 <- insurers_small[1:(length(insurers_small)-1)]
			pop_output[insurers_big4] <- by(data$insurer_small,data$insurer_small,length)[insurers_big4]/
				(nrow(data) - length(which(is.na(data$insurer_small))))
			pop_output["Other"] <- 1 - sum(pop_output[insurers_big4])
		}
	
		# Insurers
		#if(any(insurers %in% fields)) {
		#	pop_output[insurers] <- by(data$insurer,data$insurer,length)[insurers]/
		#		(nrow(data) - length(which(is.na(data$insurer))))
		#}
	
		# Years
		if(any(years %in% fields)) {
			pop_output[years] <- by(data$year,data$year,length)[years]/
				(nrow(data) - length(which(is.na(data$year))))
		}
	
		# Income Groups
		if(any(income_groups %in% fields)) {
			pop_output[income_groups] <- 
				by(data$subsidy_fpl_bracket,data$subsidy_fpl_bracket,length)[income_groups]/
				(nrow(data) - length(which(is.na(data$subsidy_fpl_bracket))))
		}
	
		# Age Groups
		if(any(age_groups %in% fields)) {
			pop_output[age_groups] <- by(data$age_group,data$age_group,length)[age_groups]/
				(nrow(data) - length(which(is.na(data$age_group))))
		}
	
		# Genders
		if(any(genders %in% fields)) {
			pop_output[genders] <- by(data$gender,data$gender,length)/
				(nrow(data) - length(which(is.na(data$gender))))
		}
	
		# Race Groups
		if(any(race_groups %in% fields)) {
			pop_output[race_groups] <- by(data$race,data$race,length)[race_groups]/nrow(data) 
			pop_output[race_groups] <- pop_output[race_groups]/sum(pop_output[race_groups])
			#pop_output["Unreported"] <- 1 - sum(pop_output[race_groups],na.rm=TRUE)
		}
		
		# Subsidy Groups
		if(any(subsidy_groups %in% fields)) {
			pop_output[subsidy_groups] <- by(data$subsidized,data$subsidized,length)[subsidy_groups]/
				(nrow(data) - length(which(is.na(data$subsidized))))
		}
		
		# Languages
		if(any(languages %in% fields)) {
			pop_output[languages] <- by(data$language,data$language,length)[languages]/
				(nrow(data) - length(which(is.na(data$language))))
		}
		
		# Dominated Choice
		if("dominated choice" %in% fields) {
			pop_output["dominated choice"] <- 
				length(which(data$csr_eligible == 1 & !is.na(data$csr_eligible) & 
					data$dominated_choice == 1 & !is.na(data$dominated_choice)))/
				length(which(data$csr_eligible == 1 & !is.na(data$csr_eligible) & data$FPL <= 2.00))
		}
		
		# CSR chose bronze
		if("csr_chose_bronze" %in% fields) {
			pop_output["csr_chose_bronze"] <- 
				length(which(data$csr_eligible == 1 & !is.na(data$csr_eligible) & 
					data$csr_chose_bronze == 1 & !is.na(data$csr_chose_bronze)))/
				length(which(data$csr_eligible == 1 & !is.na(data$csr_eligible) & data$FPL <= 2.00))
		}
		
		return(pop_output)
	}

# Summarize data by year (for Inertia Paper)

	fields <- c("Total Enrollment",product_fields_inertia,income_groups,subsidy_groups,age_groups,genders,race_groups)
	#cols <- c("Y2014","Y2015","Y2016","Y2017","Y2018","Y2019","Overall")
	cols <- c("Y2014","Y2015","Y2016","Y2017","Y2018","Overall")
	
	output <- matrix(NA,length(fields),length(cols),dimnames=list(fields,cols))
	output[,"Y2014"] <- compute_population_choices(data = data[data$year == 2014,],fields = fields)
	output[,"Y2015"] <- compute_population_choices(data = data[data$year == 2015,],fields = fields)
	output[,"Y2016"] <- compute_population_choices(data = data[data$year == 2016,],fields = fields)
	output[,"Y2017"] <- compute_population_choices(data = data[data$year == 2017,],fields = fields)
	output[,"Y2018"] <- compute_population_choices(data = data[data$year == 2018,],fields = fields)
	#output[,"Y2019"] <- compute_population_choices(data = data[data$year == 2019,],fields = fields)
	output[,"Overall"] <- compute_population_choices(data = data[data$year <= 2018,],fields = fields)
	
	# Add average unsubsidized and subsidized premiums
	households$unsubsidized_premium <- households$rating_factor * plan_data[households$plan_id,"Premium"]/1.278
	households[is.na(households$subsidy),"subsidy"] <- 0
	households$subsidized_premium <- pmax(0,households$unsubsidized_premium - households$subsidy)
	
	years <- c(2014:2018)
	prem_fields <- c("Unsubsidized Premium","Subsidized Premium")
	mean_premiums <- matrix(NA,length(prem_fields),length(years),dimnames=list(prem_fields,years))
	for(t in years) {
		mean_premiums["Unsubsidized Premium",as.character(t)] <- 
			weighted.mean(households[households$year == t & !is.na(households$year),"unsubsidized_premium"]/
				households[households$year == t & !is.na(households$year),"enrollees"],
				households[households$year == t & !is.na(households$year),"enrollees"],na.rm=TRUE) 
		mean_premiums["Subsidized Premium",as.character(t)] <- 
			weighted.mean(households[households$year == t & !is.na(households$year),"subsidized_premium"]/
				households[households$year == t & !is.na(households$year),"enrollees"],
				households[households$year == t & !is.na(households$year),"enrollees"],na.rm=TRUE) 
	}
	
	overall_premiums <- c(weighted.mean(households$unsubsidized_premium/households$enrollees,households$enrollees,na.rm=TRUE),
		weighted.mean(households$subsidized_premium/households$enrollees,households$enrollees,na.rm=TRUE))
	mean_premiums <- cbind(mean_premiums,overall_premiums)
	
	colnames(mean_premiums) <- cols
	output <- rbind(mean_premiums,output)
	
	write.csv(output,"basic_demographics_inertia.csv")
	

	# Average Premium for New and Returning Enrollees
	years <- c(2015:2018)
	prem_fields <- c("Unsubsidized Renewal Premium","Unsubsidized New Premium","Diff Unsubsidized","Subsidized Renewal Premium","Subsidized New Premium","Diff Subsidized")
	mean_premiums <- matrix(NA,length(years),length(prem_fields),dimnames=list(years,prem_fields))	
	
	households$new_enrollee <- 0
	households[is.na(households$previous_plan_number) & !is.na(households$plan_number_nocsr),"new_enrollee"] <- 1
	
	households$returning_enrollee <- 0
	households[!is.na(households$previous_plan_number) & !is.na(households$plan_number_nocsr),"returning_enrollee"] <- 1
	
	
	for(t in years) {
		mean_premiums[as.character(t),"Unsubsidized New Premium"] <- 
			weighted.mean(households[households$year == t & households$new_enrollee == 1,"unsubsidized_premium"]/
				households[households$year == t & households$new_enrollee == 1,"enrollees"],
				households[households$year == t & households$new_enrollee == 1,"enrollees"],na.rm=TRUE) 
		mean_premiums[as.character(t),"Unsubsidized Renewal Premium"] <- 
			weighted.mean(households[households$year == t & households$returning_enrollee == 1,"unsubsidized_premium"]/
				households[households$year == t & households$returning_enrollee == 1,"enrollees"],
				households[households$year == t & households$returning_enrollee == 1,"enrollees"],na.rm=TRUE) 
		
		mean_premiums[as.character(t),"Subsidized New Premium"] <- 
			weighted.mean(households[households$year == t & households$new_enrollee == 1,"subsidized_premium"]/
				households[households$year == t & households$new_enrollee == 1,"enrollees"],
				households[households$year == t & households$new_enrollee == 1,"enrollees"],na.rm=TRUE) 
		mean_premiums[as.character(t),"Subsidized Renewal Premium"] <- 
			weighted.mean(households[households$year == t & households$returning_enrollee == 1,"subsidized_premium"]/
				households[households$year == t & households$returning_enrollee == 1,"enrollees"],
				households[households$year == t & households$returning_enrollee == 1,"enrollees"],na.rm=TRUE) 
				
		mean_premiums[as.character(t),"Diff Unsubsidized"] <- (mean_premiums[as.character(t),"Unsubsidized New Premium"] - mean_premiums[as.character(t),"Unsubsidized Renewal Premium"])/
			mean_premiums[as.character(t),"Unsubsidized Renewal Premium"]
		mean_premiums[as.character(t),"Diff Subsidized"] <- (mean_premiums[as.character(t),"Subsidized New Premium"] - mean_premiums[as.character(t),"Subsidized Renewal Premium"])/
			mean_premiums[as.character(t),"Subsidized Renewal Premium"]
	}
	
	annual_enrollment <- by(households[households$year >= 2015 & households$year <= 2018,"enrollees"],households[households$year >= 2015 & households$year <= 2018,"year"],sum)
	annual_enrollment <- annual_enrollment/sum(annual_enrollment)
	averages <- t(mean_premiums) %*% annual_enrollment
	
	mean_premiums <- rbind(mean_premiums,0)
	mean_premiums[nrow(mean_premiums),] <- averages
	if (two_choice_sample_flag) {
		write.csv(mean_premiums,"new_renewal_premiums.csv")
	} 
	